In [1]:
%env SODAPY_APPTOKEN = HA1B7dCJquyL8zRLXZ2YlWvNQ
!pip install sodapy
!pip install -U plotly
# !pip install geopandas
env: SODAPY_APPTOKEN=HA1B7dCJquyL8zRLXZ2YlWvNQ
Collecting sodapy
  Downloading sodapy-2.1.0-py2.py3-none-any.whl (14 kB)
Requirement already satisfied: requests>=2.20.0 in /usr/local/lib/python3.7/dist-packages (from sodapy) (2.23.0)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20.0->sodapy) (3.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20.0->sodapy) (2021.10.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20.0->sodapy) (1.24.3)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.20.0->sodapy) (2.10)
Installing collected packages: sodapy
Successfully installed sodapy-2.1.0
Requirement already satisfied: plotly in /usr/local/lib/python3.7/dist-packages (4.4.1)
Collecting plotly
  Downloading plotly-5.4.0-py2.py3-none-any.whl (25.3 MB)
     |████████████████████████████████| 25.3 MB 54.3 MB/s 
Collecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from plotly) (1.15.0)
Installing collected packages: tenacity, plotly
  Attempting uninstall: plotly
    Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-5.4.0 tenacity-8.0.1
Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
     |████████████████████████████████| 1.0 MB 8.7 MB/s 
Requirement already satisfied: pandas>=0.25.0 in /usr/local/lib/python3.7/dist-packages (from geopandas) (1.1.5)
Requirement already satisfied: shapely>=1.6 in /usr/local/lib/python3.7/dist-packages (from geopandas) (1.8.0)
Collecting fiona>=1.8
  Downloading Fiona-1.8.20-cp37-cp37m-manylinux1_x86_64.whl (15.4 MB)
     |████████████████████████████████| 15.4 MB 28.5 MB/s 
Collecting pyproj>=2.2.0
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
     |████████████████████████████████| 6.3 MB 37.2 MB/s 
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Requirement already satisfied: certifi in /usr/local/lib/python3.7/dist-packages (from fiona>=1.8->geopandas) (2021.10.8)
Requirement already satisfied: attrs>=17 in /usr/local/lib/python3.7/dist-packages (from fiona>=1.8->geopandas) (21.2.0)
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from fiona>=1.8->geopandas) (57.4.0)
Requirement already satisfied: click>=4.0 in /usr/local/lib/python3.7/dist-packages (from fiona>=1.8->geopandas) (7.1.2)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Requirement already satisfied: six>=1.7 in /usr/local/lib/python3.7/dist-packages (from fiona>=1.8->geopandas) (1.15.0)
Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.25.0->geopandas) (1.19.5)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.25.0->geopandas) (2.8.2)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.25.0->geopandas) (2018.9)
Installing collected packages: munch, cligj, click-plugins, pyproj, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.8.20 geopandas-0.10.2 munch-2.5.0 pyproj-3.2.1
In [2]:
import os
from urllib.request import urlopen
import json
import pandas as pd
import numpy as np
from sodapy import Socrata
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px
# import geopandas as gpd
import folium

Import data & preprocess

In [5]:
def get_dataset(dataset_id):
  apptoken = os.environ.get("SODAPY_APPTOKEN") # Anonymous app token
  domain = "data.melbourne.vic.gov.au"
  client = Socrata(domain, apptoken) # Open Dataset connection

  dataresource = client.get_all(dataset_id)
  dataset = pd.DataFrame(dataresource)

  return dataset
In [11]:
#Fetching dataset
dt_id_seats = "dyqx-cfn5"
dt_id_capacity = "9hjf-8i2d"
# smallAreaLayer_id = "gei8-3w86"

df_seats = get_dataset(dt_id_seats)
df_capacity = get_dataset(dt_id_capacity)
In [14]:
int_cols = ['census_year','industry_anzsic4_code','block_id','number_of_seats']
float_cols = ['x_coordinate', 'y_coordinate']

df_seats[int_cols] = df_seats[int_cols].astype(int)
df_seats[float_cols] = df_seats[float_cols].astype(float)
df_seats = df_seats.convert_dtypes() # convert remaining to string
df_seats = df_seats.dropna(axis=0)

print(f'The shape of number of seatings dataset is {df_seats.shape}.')
df_seats.head(10)
The shape of number of seatings dataset is (3236, 13).
Out[14]:
census_year block_id property_id base_property_id street_address clue_small_area trading_name industry_anzsic4_code industry_anzsic4_description seating_type number_of_seats x_coordinate y_coordinate
0 2020 1 611394 611394 545-557 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 551 Flinders Street MELBOURNE VIC 3000 4511 Cafes and Restaurants Seats - Indoor 60 144.956514 -37.820979
1 2020 1 611394 611394 545-557 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 551 Flinders Street MELBOURNE VIC 3000 4511 Cafes and Restaurants Seats - Outdoor 6 144.956514 -37.820979
2 2020 1 611394 611394 545-557 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 553 Flinders Street MELBOURNE VIC 3000 4512 Takeaway Food Services Seats - Indoor 12 144.956514 -37.820979
3 2020 1 611394 611394 545-557 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 547 Flinders Street MELBOURNE VIC 3000 4511 Cafes and Restaurants Seats - Indoor 44 144.956514 -37.820979
4 2020 1 611395 611395 561-581 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 563 Flinders Street MELBOURNE VIC 3000 4512 Takeaway Food Services Seats - Indoor 12 144.955909 -37.821087
5 2020 1 611395 611395 561-581 Flinders Street MELBOURNE VIC 3000 Melbourne (CBD) 563 Flinders Street MELBOURNE VIC 3000 4512 Takeaway Food Services Seats - Outdoor 16 144.955909 -37.821087
6 2020 2 111467 103973 Sea Life Melbourne Aquarium King Street MELBOU... Melbourne (CBD) Sea Life Melbourne Aquarium King Street MELBOU... 8921 Zoological and Botanical Gardens Operation Seats - Indoor 164 144.958508 -37.820589
7 2020 2 111467 103973 Sea Life Melbourne Aquarium King Street MELBOU... Melbourne (CBD) Sea Life Melbourne Aquarium King Street MELBOU... 8921 Zoological and Botanical Gardens Operation Seats - Outdoor 34 144.958508 -37.820589
8 2020 4 104034 104034 Flinders Street Railway Station 207-361 Flinde... Melbourne (CBD) Suite 30, Ground 207-361 Flinders Street MELBO... 4511 Cafes and Restaurants Seats - Outdoor 8 144.965827 -37.818693
9 2020 4 104034 104034 Flinders Street Railway Station 207-361 Flinde... Melbourne (CBD) Suite 30, Ground 207-361 Flinders Street MELBO... 4511 Cafes and Restaurants Seats - Indoor 48 144.965827 -37.818693
In [24]:
int_cols = ['census_year','block_id','number_of_patrons']
float_cols = ['x_coordinate', 'y_coordinate']

df_capacity[int_cols] = df_capacity[int_cols].astype(int)
df_capacity[float_cols] = df_capacity[float_cols].astype(float)
df_capacity = df_capacity.convert_dtypes() # convert remaining to string
df_capacity = df_capacity.dropna(axis=0)

print(f'The shape of number of patrons(capacity) dataset is {df_capacity.shape}.')
df_capacity.head()
The shape of number of patrons(capacity) dataset is (252, 10).
Out[24]:
census_year block_id property_id base_property_id street_address clue_small_area trading_name number_of_patrons x_coordinate y_coordinate
0 2020 6 578321 573333 Atrium Federation Square 2 Swanston Street MEL... Melbourne (CBD) Tenancy 15, Atrium Federation Square 2 Swansto... 166 144.969824 -37.817789
1 2020 6 578321 573333 Atrium Federation Square 2 Swanston Street MEL... Melbourne (CBD) Tenancy 17 & 23, Lvls 1-2 Atrium Federation Sq... 1000 144.969824 -37.817789
2 2020 6 578324 573333 Tenancy 29, Transport Federation Square 2 Swan... Melbourne (CBD) Tenancy 29, Ground 2 Swanston Street MELBOURNE... 600 144.969824 -37.817789
3 2020 6 578324 573333 Tenancy 29, Transport Federation Square 2 Swan... Melbourne (CBD) Tenancy 29, Level 2 2 Swanston Street MELBOURN... 200 144.969824 -37.817789
4 2020 6 578327 573333 Tenancy 18, Zinc 2 Swanston Street MELBOURNE V... Melbourne (CBD) Tenancy 18, Zinc 2 Swanston Street MELBOURNE V... 1000 144.969824 -37.817789

EDA

Aggregate statistics of the datasets

In [36]:
# For number of seats
groupbyfields = ['x_coordinate','y_coordinate','street_address','industry_anzsic4_description']
aggregatebyfields = {'number_of_seats': ["sum"]}

seat_stats = pd.DataFrame(df_seats.groupby(groupbyfields, as_index=False).agg(aggregatebyfields))
seat_stats.columns = seat_stats.columns.map(''.join) # flatten column header
seat_stats.rename(columns={
    'industry_anzsic4_description':'industry',
    'number_of_seatssum': 'number_of_seats',
    'x_coordinate': 'lon',
    'y_coordinate': 'lat',
    },
    inplace=True) #rename to match GeoJSON extract

# Convert (x, y)'s to Geopandas POINT's
# seat_stats['geometry'] = gpd.points_from_xy(seat_stats['x_coordinate'], seat_stats['y_coordinate'])
seat_stats['number_of_seats'] = seat_stats['number_of_seats'].astype(int)

print(seat_stats.shape)
seat_stats.head(10)
(1518, 5)
Out[36]:
lon lat street_address industry number_of_seats
0 144.904292 -37.825356 263-329 Lorimer Street PORT MELBOURNE VIC 3207 Cafes and Restaurants 270
1 144.904984 -37.826478 262-276 Lorimer Street PORT MELBOURNE VIC 3207 Cafes and Restaurants 44
2 144.906336 -37.827977 38-46 Sabre Drive PORT MELBOURNE VIC 3207 Cafes and Restaurants 78
3 144.909504 -37.826756 34 Wirraway Drive PORT MELBOURNE VIC 3207 Bakery Product Manufacturing (Non-factory based) 36
4 144.910561 -37.826391 187 Todd Road PORT MELBOURNE VIC 3207 Cafes and Restaurants 30
5 144.910631 -37.803809 93-119 Sims Street WEST MELBOURNE VIC 3003 Takeaway Food Services 16
6 144.912861 -37.789061 Flemington Racecourse 428-514 Epsom Road FLEMI... Horse and Dog Racing Administration and Track ... 3660
7 144.912861 -37.789061 Flemington Racecourse 428-514 Epsom Road FLEMI... Other Gambling Activities 100
8 144.915100 -37.782253 Showgrounds Village 320-386 Epsom Road FLEMING... Cafes and Restaurants 330
9 144.915100 -37.782253 Showgrounds Village 320-386 Epsom Road FLEMING... Takeaway Food Services 14
In [39]:
# For number of seats
groupbyfields = ['x_coordinate','y_coordinate','street_address']
aggregatebyfields = {'number_of_patrons': ["sum"]}

capacity_stats = pd.DataFrame(df_capacity.groupby(groupbyfields, as_index=False).agg(aggregatebyfields))
capacity_stats.columns = capacity_stats.columns.map(''.join) # flatten column header
capacity_stats.rename(columns={
    'industry_anzsic4_description':'industry',
    'number_of_patronssum': 'number_of_patrons',
    'x_coordinate': 'lon',
    'y_coordinate': 'lat',
    },
    inplace=True) #rename to match GeoJSON extract

# Convert (x, y)'s to Geopandas POINT's
# capacity_stats['geometry'] = gpd.points_from_xy(capacity_stats['x_coordinate'], capacity_stats['y_coordinate'])
capacity_stats['number_of_patrons'] = capacity_stats['number_of_patrons'].astype(int)

print(capacity_stats.shape)
capacity_stats.head(10)
(215, 4)
Out[39]:
lon lat street_address number_of_patrons
0 144.932195 -37.789322 RSL 25-27 Rankins Road KENSINGTON VIC 3031 90
1 144.932365 -37.788612 The Quiet Man Hotel 265-271 Racecourse Road KE... 400
2 144.934344 -37.798101 38-44 Barrett Street KENSINGTON VIC 3031 75
3 144.935192 -37.790453 40-48 Stubbs Street KENSINGTON VIC 3031 80
4 144.935647 -37.812139 O'Brien Icehouse 99-127 Pearl River Road DOCKL... 100
5 144.937759 -37.813173 NW Building 23-37 Star Crescent DOCKLANDS VIC ... 700
6 144.939207 -37.815039 East Building Newquay Central 425-427 Dockland... 1059
7 144.942172 -37.814363 24-42 Newquay Promenade DOCKLANDS VIC 3008 321
8 144.946988 -37.807405 McMahon's Hotel 573-579 Spencer Street WEST ME... 160
9 144.947471 -37.816487 Marvel Stadium 122-148 Harbour Esplanade DOCKL... 5820

Visualise on map

Colour mappping for types of industry

In [43]:
# Prepare colour dictionary for industries
keys = list(seat_stats['industry'].unique())
color_range = list(np.linspace(0, 1, len(keys), endpoint=False))
colors = [matplotlib.colors.to_hex(plt.cm.tab20(x)) for x in color_range]
color_dict_industry = dict(zip(keys, colors))

Layer for displaying number of seats

In [59]:
# Number of seats layer:
num_seats = folium.FeatureGroup(name="Number of Seats",
                                show=True,)


for i in range(0,len(seat_stats)):

  circle_styles = {
    'fill': True,
    'color': color_dict_industry[seat_stats.iloc[i]['industry']],
    'weight': 1.5,
    # 'fillOpacity': 1
  }

  html=f"""
      <h5>{seat_stats.iloc[i]['street_address']}</h5>
      <p>Number of seats: {seat_stats.iloc[i]['number_of_seats']}</p>
      <p>Industry: {seat_stats.iloc[i]['industry']}</p>
      """
  iframe = folium.IFrame(html=html, width=200, height=200)
  popup = folium.Popup(iframe, max_width=2650)

  folium.Circle(
    location=[seat_stats.iloc[i]['lat'], seat_stats.iloc[i]['lon']],
    popup=popup,
    radius=float(seat_stats.iloc[i]['number_of_seats'])*0.1, 
    **circle_styles
  ).add_to(num_seats)

Layer for displaying number of patrons

In [58]:
# Number of patrons layer:
num_capacity = folium.FeatureGroup(name="Number of Patrons (Capacity) of Bars/taverns/pubs", 
                                   show=False,)


for i in range(0,len(capacity_stats)):

  circle_styles = {
    'fill': True,
    'color': color_dict_industry['Pubs, Taverns and Bars'],
    'weight': 1.5,
    # 'fillOpacity': 1
  }

  html=f"""
      <h5>{capacity_stats.iloc[i]['street_address']}</h5>
      <p>Number of Patrons (Capacity): {capacity_stats.iloc[i]['number_of_patrons']}</p>
      """
  iframe = folium.IFrame(html=html, width=200, height=200)
  popup = folium.Popup(iframe, max_width=2650)

  folium.Circle(
    location=[capacity_stats.iloc[i]['lat'], capacity_stats.iloc[i]['lon']],
    popup=popup,
    radius=float(capacity_stats.iloc[i]['number_of_patrons'])*0.1, 
    **circle_styles
  ).add_to(num_capacity)
In [61]:
map = folium.Map(location=[-37.813, 144.945], tiles="CartoDB dark_matter", zoom_start=13)

num_seats.add_to(map)
num_capacity.add_to(map)
folium.LayerControl(collapsed=False).add_to(map)

# Show the map
map
Out[61]:
Make this Notebook Trusted to load map: File -> Trust Notebook